library(tidyverse)
library(readxl)
options(scipen=0, digits=7) #to standardize number of digits when exporting data. all data here is run using these options.

# City Crime ---------
LESO_path <- "raw-data/LESO-Controls"
tempcitycrime = list.files(path = LESO_path,pattern="citycrime20[0-9][0-9].xls")
tempcitycrime <- lapply(file.path(LESO_path,tempcitycrime), read_excel) 

crime <- tempcitycrime %>%
  plyr::rbind.fill(.) %>%
  rename(Violent_crime=`Violent crime`,Murder = `Murder and nonnegligent manslaughter`,
         Rape_revised_def = `Rape (revised definition)`, 
         Rape_legacy_def = `Rape (legacy definition)`,Assault=`Aggravated assault`,
         Property_crime= `Property crime`,MV_theft=`Motor vehicle theft`) %>%
  mutate(allcrime = rowSums(select(., contains("crime")),na.rm=T),
         allcrimewithna = rowSums(select(., contains("crime"))),
         State = str_to_title(State)) %>% #title case 
  filter(Population!=0) # remove if population is missing or equal to 0

# Main outcome variable: crime rates by different types of crime per 100,000 -----
crimerates <- crime %>%
  group_by(City,State,year) %>%
  mutate_all(funs(rate=(./Population)*100000)) %>%
  distinct()

save(crimerates, file="data/crimerates.RData")

##---------------------------------------##
## county level crime ------
tempcntycrime <- list.files(path = LESO_path,pattern="countysumcrime20[0-9][0-9].xls")
tempcntycrime <- lapply(file.path(LESO_path,tempcntycrime), read_excel)

countycrime <- tempcntycrime %>%
  plyr::rbind.fill(.) %>% 
  mutate(state.fips = str_pad(state.fips,width=2,pad=0,side="left"),
         county.fips = str_pad(county.fips,width=2,pad=0,side="left")) %>%
  group_by(year,state.fips,county.fips)%>%
  mutate(VIOL= ifelse(is.na(VIOL), sum(MURDER, RAPE, ROBBERY,AGASSLT), VIOL),
         PROPERTY= ifelse(is.na(PROPERTY), sum(BURGLRY, LARCENY, MVTHEFT), PROPERTY),
         allcrime = VIOL+PROPERTY)  

#get the main dependent variables of interest (crime rates)
countycrimerates <- countycrime %>%
  group_by(county.fips,state.fips,year) %>%
  mutate_all(funs(countysumrate=(./CPOPARST)*100000)) %>%
  select(-INDEX_countysumrate,-MODINDX_countysumrate) %>%
  mutate(state_county_fips = paste0(state.fips,county.fips),
         missingdata = ifelse(COVIND==0 & allcrime==0,1,0),
         lessthan100coverage = ifelse(COVIND<100,1,0)) 

# Load in fips codes to merge with county ---------
countycrimerates <- read_csv("raw-Data/countyfips.csv") %>%
  mutate(state.fips = str_pad(state.fips,width=2,pad=0,side="left"),
         county.fips = str_pad(county.fips,width=2,pad=0,side="left")) %>%
  left_join(countycrimerates,.) %>%
  select(State,County,state.fips,county.fips,year,VIOL_countysumrate, MURDER_countysumrate,
         RAPE_countysumrate, RAPE_countysumrate, ROBBERY_countysumrate,
         AGASSLT_countysumrate, PROPERTY_countysumrate, BURGLRY_countysumrate,
         LARCENY_countysumrate, MVTHEFT_countysumrate, ARSON_countysumrate,
         allcrime_countysumrate,missingdata,lessthan100coverage) %>%
  arrange(State,County) %>%
  filter(!is.na(State)) # There are 3 counties in AK that are in the crime data but aren't listed as real fips codes.
                        # These must be mistakes/typos in the crime data. Delete these.
 
save(countycrimerates, file='data/crime_county.RData') 
